library(LEA)
library(fields)
library(RColorBrewer)
library(mapplots)
library(geosphere)
library(reshape2)
library(ggplot2)
library(pegas)
library(ape)
library(dplyr)
library(vcfR)

#set and read
setwd("/Desktop/A/")
struct2geno("rc90cov_rand.stru", ploidy = 2, FORMAT = 2, extra.row = 0, extra.col = 1)

# running snmf #
#project.snmf <- snmf("rc90cov_rand.stru.geno", K = 1:5, entropy = TRUE, repetitions = 20, project = "new")
project.snmf <-  load.snmfProject("rc90cov_rand.stru.snmfProject")
#pdf("a_kplot.pdf", width = 12, height = 8)
plot(project.snmf, col = "blue4", cex = 1.4, pch = 19)
#dev.off()

#stats
#basic
aln <- read.dna("Munidopsis.fasta", format = "fasta")

haps <- haplotype(aln)
print(haps)

haploNet <- haploNet(haps)
print(haploNet)

h <- hap.div(aln)
cat("Haplotype diversity:", h, "\n")

pi <- nuc.div(aln)
cat("Nucleotide diversity (pi):", pi, "\n")

tajima <- tajima.test(aln)
print(tajima)

# data prep
# Read the VCF file
Munidopsis.vcf <- read.vcfR("rc90cov_rand.vcf")
# Examine the VCF data
print(Munidopsis.vcf)
# Load DNAbin
dnabin_object <- as.DNAbin("Munidopsis_sim_matrix")
# Check the DNAbin object
print(dnabin_object)
summary(dnabin_object)

# Calculate haplotypes
haps <- haplotype(dnabin_object)
print(haps)

# Calculate haplotype diversity
hap_diversity <- hap.div(haps)
print(hap_diversity)

# Calculate Nucleotide Diversity (π)
nucleotide_diversity <- nuc.div(dnabin_object)
print(nucleotide_diversity)

#Tajima's D Test 
tajima_test <- tajima.test(dnabin_object)
print(tajima_test)

# Calculating pairwise differences
pairwise_diffs <- dist.dna(dnabin_object, model = "raw", pairwise.deletion = TRUE)
pairwise_diffs

# Convert the pairwise distance matrix to a format suitable for plotting
pairwise_diffs_vec <- as.vector(pairwise_diffs)
# Plotting
hist(pairwise_diffs_vec, main = "Munidopsis Mismatch Distribution", xlab = "Pairwise Differences", ylab = "Frequency", col = "lightblue")

# Geographic distances between individuals
all_samples <- read.csv("locals.csv")
Munidopsis_individuals <- all_samples %>% filter(Species == "Munidopsis")
Munidopsis_individuals
# Calculate pairwise distances
# Extract coordinates
coordinates <- Munidopsis_individuals[, c("Longitude", "Latitude")]
# Calculate the distance matrix
distance_matrix <- distm(coordinates) / 1000  # Convert meters to kilometers
# Convert distance matrix to a data frame with IDs
# First, ensure row and column names using sample IDs
rownames(distance_matrix) <- Munidopsis_individuals$ID
colnames(distance_matrix) <- Munidopsis_individuals$ID
# Convert the matrix to a data frame
distance_df <- as.data.frame(as.table(as.matrix(distance_matrix)))
# Optionally change the column names for clarity
colnames(distance_df) <- c("ID1", "ID2", "Distance_km")
# Print the result
print(distance_df)
distance_df

# reshape data for plotting
pairwise_diffs
# Convert the genetic matrix to a data frame in long format
genetic_matrix <- as.matrix(pairwise_diffs)
genetic_matrix
genetic_df <- melt(genetic_matrix)
colnames(genetic_df) <- c("ID1", "ID2", "Genetic_Distance")

# Merge the geographic and genetic distances
combined_df <- merge(distance_df, genetic_df, by = c("ID1", "ID2"))
# Print the combined data frame
print(combined_df)
# plotting
# Load the packages
# Calculate correlation
cor_result <- cor.test(combined_df$Distance_km, combined_df$Genetic_Distance)
# Fit a linear model
lm_model <- lm(Genetic_Distance ~ Distance_km, data = combined_df)
# Extract the model coefficients
lm_coeffs <- coef(lm_model)